library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## here() starts at C:/Users/starf/Documents/GitHub/BIS15W2021_ahearne
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(qtl)
library(qtlcharts)
library(tidyverse)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(albersusa)
library(ggthemes)
1. We have a satellite collars on a number of different individuals and want to be able to quickly look at all of their recent movements at once. Please load all the data files from us_individual_collar_data and use for loop to create plots for all different individuals of the path they move on longitude and latitude.
us_ic_data <- list.files(path = "data/us_individual_collar_data", full.names=T)
us_ic_data
## [1] "data/us_individual_collar_data/collar-data-A1-2016-02-26.txt"
## [2] "data/us_individual_collar_data/collar-data-B2-2016-02-26.txt"
## [3] "data/us_individual_collar_data/collar-data-C3-2016-02-26.txt"
## [4] "data/us_individual_collar_data/collar-data-D4-2016-02-26.txt"
## [5] "data/us_individual_collar_data/collar-data-E5-2016-02-26.txt"
## [6] "data/us_individual_collar_data/collar-data-F6-2016-02-26.txt"
## [7] "data/us_individual_collar_data/collar-data-G7-2016-02-26.txt"
## [8] "data/us_individual_collar_data/collar-data-H8-2016-02-26.txt"
## [9] "data/us_individual_collar_data/collar-data-I9-2016-02-26.txt"
## [10] "data/us_individual_collar_data/collar-data-J10-2016-02-26.txt"
paths<-for (i in 1: length(us_ic_data)){
us_individual <- as.data.frame(read_csv(us_ic_data[i]))
print(
ggplot(us_individual, aes(x=long, y=lat))+
geom_path()+
geom_point()+
theme_solarized_2()+
theme(legend.position="right",
axis.text.x=element_text(angle=60, hjust=1))+
labs(title=us_ic_data[i],
x="Longitude",
y="Latitude")
)
}
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )

## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )

## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )

## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )

## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )

## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )

## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )

## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )

## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )


2. Please load all the data files from us_individual_collar_data and combine all data into one data frame. Create a summary to show what is the maximum and minimum of recorded data points on longitude and latitude.
us_ind_collar<-lapply(us_ic_data, read_csv)
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## date = col_date(format = ""),
## collar = col_character(),
## time = col_datetime(format = ""),
## lat = col_double(),
## long = col_double()
## )
names<-strsplit(us_ic_data, split=".txt")
names2<-unlist(names)
names(us_ind_collar)<-names2
full_data<-bind_rows(us_ind_collar)
full_data
## # A tibble: 240 x 6
## X1 date collar time lat long
## <dbl> <date> <chr> <dttm> <dbl> <dbl>
## 1 1 2016-02-26 A1 2016-02-26 00:00:00 37.0 -116.
## 2 2 2016-02-26 A1 2016-02-26 01:00:00 38.4 -117.
## 3 3 2016-02-26 A1 2016-02-26 02:00:00 37.9 -117.
## 4 4 2016-02-26 A1 2016-02-26 03:00:00 38.1 -115.
## 5 5 2016-02-26 A1 2016-02-26 04:00:00 35.4 -116.
## 6 6 2016-02-26 A1 2016-02-26 05:00:00 36.6 -114.
## 7 7 2016-02-26 A1 2016-02-26 06:00:00 36.9 -115.
## 8 8 2016-02-26 A1 2016-02-26 07:00:00 37.3 -115.
## 9 9 2016-02-26 A1 2016-02-26 08:00:00 35.6 -114.
## 10 10 2016-02-26 A1 2016-02-26 09:00:00 35.9 -114.
## # ... with 230 more rows
full_data%>%
summarise(max_long=max(long),
min_long=min(long),
max_lat=max(lat),
min_lat=min(lat))
## # A tibble: 1 x 4
## max_long min_long max_lat min_lat
## <dbl> <dbl> <dbl> <dbl>
## 1 -106. -123. 41.6 26.6
3. Use the range of the latitude and longitude from Q2 to build an appropriate bounding box for your map and load a map from stamen in a terrain style projection and display the map. Then, build a final map that overlays the recorded path from Q1.
lat<-c(26.6116,41.58802)
long<-c(-122.6017, -106.3343)
bbox<-make_bbox(long,lat,f=0.05)
collar_map<-get_map(bbox, maptype = "terrain", source="stamen")
## Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.
i <- 1
for (i in 1:length(us_ic_data)){
us_individual <- as.data.frame(read_csv(us_ic_data[i]))
print(
ggmap(collar_map)+
geom_path(data=us_individual, aes(long,lat, color=time, size=.08, alpha=0.3))+
geom_point(data=us_individual, aes(long,lat))+
scale_size(range = c(4),
guide = "none") +
theme_solarized_2()+
theme(legend.position="left",
axis.text.x=element_text(angle=60, hjust=1))+
labs(title = "Collared Individual Path",
x="Longitude",
y="Latitude",
color="Date",
alpha=NULL,
size=NULL)
)
}










4. Create a summary of the hypertension data. How many individuals and phenotypes are included in this data set? How many gene markers and chromosomes are included in this data set? Please create a table to show the number of markers on each chromosome.
data(hyper)
summary(hyper)
## Backcross
##
## No. individuals: 250
##
## No. phenotypes: 2
## Percent phenotyped: 100 100
##
## No. chromosomes: 20
## Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## X chr: X
##
## Total markers: 174
## No. markers: 22 8 6 20 14 11 7 6 5 5 14 5 5 5 11 6 12 4 4 4
## Percent genotyped: 47.7
## Genotypes (%):
## Autosomes: BB:50.1 BA:49.9
## X chromosome: BY:53.0 AY:47.0
nind(hyper)
## [1] 250
nphe(hyper)
## [1] 2
totmar(hyper)
## [1] 174
nchr(hyper)
## [1] 20
nmar(hyper)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 X
## 22 8 6 20 14 11 7 6 5 5 14 5 5 5 11 6 12 4 4 4
plot(hyper)

5. Please make an interactive genetic map of markers for the hypertension data.
iplotMap(hyper)
## Set screen size to height=700 x width=1000
6. Make a plot shows the pattern of missing genotype data in the hypertension dataset. Please reorder the recorded individuals according to their blood pressure phenotypes. Is there a specific pattern of missing genotype? Please explain it.
plotMissing(hyper, main="")

plotMissing(hyper, main="", reorder=1)

These plots tell us which genotypes are missing. The reordered versions tell us specifically where certain phenotypic data is missing for specific individuals. Here it looks like individuals 50-200 are missing a ton of data.
7. Based on your answer from previous question, you probably noticed that there are gene markers without data. Please use the function drop.nullmarkers to remove markers that have no genotype data. After this, make a new summary to show the number of markers on each chromosome. How many gene markers were dropped? Where were the dropped markers located? Please use the data without nullmarkers for the following questions.
hyper2<-drop.nullmarkers(hyper)
summary(hyper2)
## Backcross
##
## No. individuals: 250
##
## No. phenotypes: 2
## Percent phenotyped: 100 100
##
## No. chromosomes: 20
## Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## X chr: X
##
## Total markers: 173
## No. markers: 22 8 6 20 14 11 7 6 5 5 14 5 5 4 11 6 12 4 4 4
## Percent genotyped: 48
## Genotypes (%):
## Autosomes: BB:50.1 BA:49.9
## X chromosome: BY:53.0 AY:47.0
summary(hyper)
## Backcross
##
## No. individuals: 250
##
## No. phenotypes: 2
## Percent phenotyped: 100 100
##
## No. chromosomes: 20
## Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## X chr: X
##
## Total markers: 174
## No. markers: 22 8 6 20 14 11 7 6 5 5 14 5 5 5 11 6 12 4 4 4
## Percent genotyped: 47.7
## Genotypes (%):
## Autosomes: BB:50.1 BA:49.9
## X chromosome: BY:53.0 AY:47.0
Only one marker was dropped on chromosome 14.
8. Please conduct single-QTL analysis and create a table to give the maximum LOD score on each chromosome based on their blood pressure phenotypes. Which gene marker has the higiest LOD score? Which chromosome contains the gene marker that has the highest LOD score? Then, creates an interactive chart with LOD curves from a genome scan for all chromosomes.
bp<-calc.genoprob(hyper,step=1)
qtl_data_hyper<-scanone(hyper)
## Warning in scanone(hyper): First running calc.genoprob.
summary(qtl_data_hyper)
## chr pos lod
## D1Mit334 1 49.2 3.527
## D2Mit62 2 54.6 1.445
## D3Mit11 3 37.2 0.781
## D4Mit164 4 29.5 8.094
## D5Mit31 5 66.7 1.545
## D6Mit188 6 21.9 1.821
## D7Mit297 7 26.2 0.400
## D8Mit271 8 59.0 0.791
## D9Mit18 9 68.9 0.750
## D10Mit214 10 15.3 0.247
## D11Mit35 11 43.7 0.626
## D12Mit37 12 1.1 0.429
## D13Mit78 13 59.0 0.313
## D14Mit7 14 52.5 0.106
## D15Mit152 15 17.5 1.705
## D16Mit70 16 51.4 0.370
## D17Mit46 17 3.3 0.207
## D18Mit17 18 14.2 0.506
## D19Mit59 19 0.0 0.792
## DXMit130 X 43.7 0.927
Looks like chromosome 4 (D4Mit164) has the gene with the highest LOD.
9. Based on your genome scan results, create a table which only includes those chromosomes with LOD > 1. Creates an interactive chart with LOD curves linked to estimated QTL effects for only those chromosomes with LOD > 1. Find the gene maker with the highest LOD score and describe how does the genetype of this marker influence the individual’s phenotype.
cute_lil_plot<-iplotScanone(qtl_data_hyper, chr=c(1,2,4,5,6,15))
cute_lil_plot
Higher LOD score gives us an indication of how likely it will be that an individual with homozygous expression at this gene will exhibit the phenotype of high blood pressure.
10.Please save your interactive chart from Q9 as a html file hyper_iplotScanone.html and make sure your upload it to your github repository with your lab14 homework as well.
htmlwidgets::saveWidget(cute_lil_plot, file="hyper_iplotScanone.html")